R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#setwd('~/./Writing programs') #changes file location
wnv <- read.csv("wnv.csv") #adds wnv.csv dataset to wnv
#head(wnv) #check first 6 lines of dataset

library(ggplot2) #loads ggpplot library
## Warning: package 'ggplot2' was built under R version 3.4.4
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.4
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplot(data=wnv) +
  geom_histogram(aes(x=Year, fill=State)) +
  labs(x="State", y="Total", title='WNV infection frequency in the USA', 
       caption="Data from: the interweb") #the outoput is not informative, facet by year
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Histogram
ggplot(data=wnv, mapping=aes(x=Total)) +
  geom_histogram(mapping = aes(fill=State)) +
  scale_y_continuous(limits = c(0,50)) +
  labs(x='Year', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 35 rows containing missing values (geom_bar).

ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Histogram with a log scale for x axis - cannot force log10 of x axis of a histogram
ggplot(data=wnv, mapping=aes(x=Total)) +
  geom_histogram(mapping = aes(fill=State)) +
  scale_y_continuous(limits = c(0,50)) +
  labs(x='Year', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 35 rows containing missing values (geom_bar).

  scale_x_log10()
## <ScaleContinuousPosition>
##  Range:  
##  Limits:    0 --    1
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#FACETING HISTOGRAM WITH AXIS LIMITS
ggplot(data=wnv, mapping=aes(x=Total)) +
  geom_histogram(mapping = aes(fill=State)) +
  facet_wrap(~ Year) +
  scale_y_continuous(limits = c(0,50)) +
  labs(x='Year', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#### Calculate case fatality rate ####

#Calculate case fatilty rate (cfr): #calculated value will appear here, though will not be appended to original dataset
wnv$cfr <- wnv$Fatal/wnv$Total

head(wnv)  # look at first 10 lines of data including cfr
##         State Year EncephMen Fever Other Total Fatal Latitude Longitude
## 1    New York 1999        59     3     0    62     7    42.54    -75.28
## 2 Connecticut 2000         0     1     0     1     0    41.51    -72.76
## 3  New Jersey 2000         5     1     0     6     1    40.17    -74.72
## 4    New York 2000        14     0     0    14     1    42.54    -75.28
## 5     Alabama 2001         2     0     0     2     1    32.28    -86.92
## 6 Connecticut 2001         6     0     0     6     1    41.51    -72.76
##          cfr
## 1 0.11290323
## 2 0.00000000
## 3 0.16666667
## 4 0.07142857
## 5 0.50000000
## 6 0.16666667
#Below plot has removed mapping statement = it still works in this example
ggplot(data=wnv, mapping=aes(x=Total)) +
  geom_histogram(aes(x=cfr, fill=State)) +
  scale_y_continuous(limits = c(0,100)) +
  labs(x='Case fatality rate', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#FACET THE CASE FATALITY RATE HISTOGRAM
#I HAVE ALSO SUBSEQUENTLY CHANGED Y AXIS MAX TO 25 AND 100 WAS TOO MUCH WHEN INITIALLY PLOTTED
ggplot(data=wnv, mapping=aes(x=Total)) +
  geom_histogram(aes(x=cfr, fill=State)) +
  facet_wrap(~ Year) +
  scale_y_continuous(limits = c(0,25)) +
  labs(x='Case fatality rate', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#CHANGE YEAR FROM NUMERICAL TO FACTOR
wnv$Year <- as.factor(wnv$Year)

#GRAPH TOTAL NUMBERS BY STATE AND ROTATE X AXIS TITLES (this is total numbers, not cfr)
ggplot(data=wnv, mapping=aes(x=State)) +
  geom_histogram(mapping = aes(x=State, y=Total, fill=Year), stat='identity') +
  labs(x='State', y='Total', title='WNV infection frequency in the USA by state', caption='Data from the interweb') +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#remove negative sign to swap around text - test & rerun code to see!

#GRAPH CASE FATALITY RATIO (CFR) BY STATE AND ROTATE X AXIS TITLES
ggplot(data=wnv, mapping=aes(x=State)) +
  geom_histogram(mapping = aes(x=State, y=cfr, fill=Year), stat='identity') +
  labs(x='State', y='Total', title='WNV case fatality ratio in the USA by state', caption='Data from the interweb') +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#### calculate mean and sd ####

#Write a function to calculate the mean and standard error (standard deviation
#divided by the square root of the sample size) of the neuroinvasive disease rate for all the
#states in a given list and given set of years. Follow the Google R style and remember to place
#the function near the top of your script. Use your function to calculate the average severe
#disease rate in California, Colorado, and New York

EncephMen <- c(wnv$EncephMen)

mean <- function(x){
  s <- sum(x)
n <- length(x)
m <- s/n
return (m)} 

mean(EncephMen) #as I have stipulated x= EncephMen in wnv dataset 
## [1] 40.68382
sd(EncephMen)
## [1] 79.81267
standard.error <- function(x){
  sqrt(var(x)/length(x))
}

standard.error(EncephMen)
## [1] 4.839354
#NOW CALCULATE FOR CALIFORNIA, COLORADO, AND NEW YORK ONLY
#NEED TO WORK WITH SUBSET OF DATA

EncephMenCaliforniaSubset <- (data=subset(wnv$EncephMen, wnv$State=='California'))

mean(EncephMenCaliforniaSubset)
## [1] 137.6667
sd(EncephMenCaliforniaSubset)
## [1] 133.7844
standard.error(EncephMenCaliforniaSubset)
## [1] 54.61725
EncephMenColoradoSubset <- (data=subset(wnv$EncephMen, wnv$State=='Colorado'))

mean(EncephMenColoradoSubset)
## [1] 142.3333
sd(EncephMenColoradoSubset)
## [1] 236.8136
standard.error(EncephMenColoradoSubset)
## [1] 96.67873
EncephMenNewYorkSubset <- (data=subset(wnv$EncephMen, wnv$State=='New York'))

mean(EncephMenNewYorkSubset)
## [1] 31.11111
sd(EncephMenNewYorkSubset)
## [1] 23.63496
standard.error(EncephMenNewYorkSubset)
## [1] 7.878318

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.